
__kernel void copyByOffset(__global uint * gin, __global uint * gout,__global uint * goffsets ){

        int global_idx = get_global_id(0);
        int local_idx = get_local_id(0);
        int group_idx = get_group_id(0);


        int from = group_idx == 0 ? 0 : goffsets[group_idx - 1];
        int size = goffsets[ group_idx ] - from;

        //gin += group_idx * get_local_size(0);
        gout += from;

        if ( local_idx < size ){
            *(gout + local_idx) = *(gin + group_idx * 1024 + local_idx);
        }

        if( global_idx == 0 ){
            gin[0] = goffsets[get_num_groups(0) - 1];
        }
}
